20220623_10x_ZKW

analysis of scEOS

demo data(1/10~1/5 datasize)

loading 18,000 cells, expected 8,000 cells, but only called 377 cells

because of the small number of called cells:
first to try filtered matrix normally
second to try raw matrix and manually call cells using ‘UMI counts > 100’
get 3,000 EOS cells more but with very low counts, it should be fine after increasing the datasize

plus data(full datasize)
1. QC and initial analysis

cell calling >3.6k and EOS >90%
with other celltypes(Epthelium/Fibroblast/Macrophage/DC/T) together

initial clustering couldn’t separate EOS into distinct subclusters
more likely grouped by nFeature levels instead
hard to give a conclusion to match bulk RNAseq data

2. cleaning-up and re-clustering

only EOS kept
naturally grouped into Nmur1+ and Nmur1- parts
seems like five states: Nmur1+ EOS1/2, Nmur1- EOS3/4/5
check markers and then merge into three only: Nmur1+ EOS1, Nmur1- EOS2/3

3. trajectory

check monocle and RNAvelocity

source("/Shared_win/projects/RNA_normal/analysis.10x.r")

load 10x data

filt.10x <- Read10X(data.dir = "../output_plus/filtered_feature_bc_matrix/")
#GEX <- filt.10x$`Gene Expression`
#FB <- filt.10x$`Antibody Capture`
# GEX only
GEX <- filt.10x

check datasets

dim(GEX)
## [1] 32285  3924
GEX[1:6,1:6]
## 6 x 6 sparse Matrix of class "dgCMatrix"
##         AAACCCAGTAACATAG-1 AAACCCAGTCGAATGG-1 AAACCCATCCTCTAAT-1
## Xkr4                     .                  .                  .
## Gm1992                   .                  .                  .
## Gm19938                  .                  .                  .
## Gm37381                  .                  .                  .
## Rp1                      .                  .                  .
## Sox17                    .                  .                  .
##         AAACGAAAGTGCGACA-1 AAACGAACATGACAAA-1 AAACGAACATGGCCCA-1
## Xkr4                     .                  .                  .
## Gm1992                   .                  .                  .
## Gm19938                  .                  .                  .
## Gm37381                  .                  .                  .
## Rp1                      .                  .                  .
## Sox17                    .                  .                  .

GEX

mainly follow https://satijalab.org/seurat/articles/pbmc3k_tutorial.html

GEX.seur <- CreateSeuratObject(counts = GEX,
                               min.cells = 3,
                               min.features = 200,
                               project = "EOS_plus")
GEX.seur
## An object of class Seurat 
## 14783 features across 3853 samples within 1 assay 
## Active assay: RNA (14783 features, 0 variable features)

MT genes

grep("^mt-",rownames(GEX),value = T)
##  [1] "mt-Nd1"  "mt-Nd2"  "mt-Co1"  "mt-Co2"  "mt-Atp8" "mt-Atp6" "mt-Co3" 
##  [8] "mt-Nd3"  "mt-Nd4l" "mt-Nd4"  "mt-Nd5"  "mt-Nd6"  "mt-Cytb"
MT_gene <- grep("^mt-",rownames(GEX),value = T)
MT_filt <- MT_gene %in% rownames(GEX.seur@assays[['RNA']]@counts)
GEX.seur[["percent.mt"]] <- PercentageFeatureSet(GEX.seur, features = MT_gene[MT_filt])


# Visualize QC metrics as a violin plot
VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3, pt.size = 0.01) + geom_point(alpha=0.1)

plota <- FeatureScatter(GEX.seur, feature1 = "nCount_RNA", feature2 = "percent.mt") 
plotb <- FeatureScatter(GEX.seur, feature1 = "nCount_RNA", feature2 = "nFeature_RNA") 
plota + plotb

at first try a loose cutoff

#GEX.seur <- subset(GEX.seur, subset = percent.mt < 5 & nFeature_RNA < 4000 & nCount_RNA < 20000)
GEX.seur <- subset(GEX.seur, subset = percent.mt < 10)
GEX.seur
## An object of class Seurat 
## 14783 features across 3776 samples within 1 assay 
## Active assay: RNA (14783 features, 0 variable features)
VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3, pt.size = 0.01) + geom_point(alpha=0.1)

plota <- FeatureScatter(GEX.seur, feature1 = "nCount_RNA", feature2 = "percent.mt") 
plotb <- FeatureScatter(GEX.seur, feature1 = "nCount_RNA", feature2 = "nFeature_RNA") 
plota + plotb

check cellcycle

s.genes <- Hmisc::capitalize(tolower(cc.genes.updated.2019$s.genes))
g2m.genes <- Hmisc::capitalize(tolower(cc.genes.updated.2019$g2m.genes))

GEX.seur <- CellCycleScoring(GEX.seur,
                             s.features = s.genes,
                             g2m.features = g2m.genes)
## Warning: The following features are not present in the object: Pimreg, not
## searching for symbol synonyms
VlnPlot(GEX.seur, features = c("S.Score", "G2M.Score"), 
        #group.by = "FB.info", 
    ncol = 2, pt.size = 0.1)

GEX.seur.cc <- GEX.seur

GEX.seur.cc <- NormalizeData(GEX.seur.cc)
GEX.seur.cc <- FindVariableFeatures(GEX.seur.cc)
GEX.seur.cc <- ScaleData(GEX.seur.cc)
Idents(GEX.seur.cc) <- "Phase"
RidgePlot(GEX.seur.cc, features = c("Pcna", "Top2a", "Mcm6", "Mki67"), ncol = 2)

GEX.seur.cc <- RunPCA(GEX.seur.cc, features = c(s.genes, g2m.genes))
DimPlot(GEX.seur.cc, reduction = 'pca')

nearly no cycling

Markers and Clusters

Normalizing

GEX.seur <- NormalizeData(GEX.seur, normalization.method = "LogNormalize", scale.factor = 10000)
GEX.seur <- FindVariableFeatures(GEX.seur, selection.method = "vst", nfeatures = 1500)

# Identify the 10 most highly variable genes
top20 <- head(VariableFeatures(GEX.seur), 20)

# plot variable features with and without labels
plot1 <- VariableFeaturePlot(GEX.seur)
plot2 <- LabelPoints(plot = plot1, points = top20, repel = TRUE)
plot1 + plot2

head(VariableFeatures(GEX.seur), 100)
##   [1] "mt-Co1"        "Krt8"          "Lgals4"        "mt-Atp6"      
##   [5] "Epcam"         "mt-Cytb"       "mt-Co3"        "Pigr"         
##   [9] "Krt19"         "Lgals2"        "Muc13"         "mt-Co2"       
##  [13] "Cldn7"         "mt-Nd1"        "Malat1"        "Lypd8"        
##  [17] "Pycard"        "Prap1"         "Hbegf"         "Phgr1"        
##  [21] "Spint2"        "mt-Nd4"        "2200002D01Rik" "Gpx2"         
##  [25] "Ccl25"         "Gna11"         "Cdh17"         "Cldn3"        
##  [29] "Elf3"          "mt-Nd2"        "Hadh"          "Dbi"          
##  [33] "Slc25a5"       "Smim24"        "Tspan8"        "Atp1b1"       
##  [37] "Uqcr11"        "Oat"           "Crip1"         "Chchd10"      
##  [41] "Aldob"         "Rps18"         "Ces2e"         "Aldh1b1"      
##  [45] "Prdx1"         "Ckmt1"         "Atp5g3"        "Atp5g1"       
##  [49] "Myo1a"         "Txn1"          "Cdh1"          "Atpif1"       
##  [53] "Uqcrq"         "Aoc1"          "Rps2"          "Tm4sf20"      
##  [57] "Rpl35"         "Plac8"         "Ccn1"          "Misp"         
##  [61] "Ppa1"          "Maoa"          "Lars2"         "Atp5o"        
##  [65] "Ugt2b34"       "Rpl10a"        "Cox4i1"        "Rpl28"        
##  [69] "Abcd3"         "Vil1"          "Cyc1"          "Sult1d1"      
##  [73] "Id3"           "St3gal4"       "Cldn15"        "Tm4sf5"       
##  [77] "Serpinb1a"     "Fabp2"         "Cox7c"         "Sfn"          
##  [81] "Cox7b"         "Serpinb6a"     "Arg2"          "Atp5b"        
##  [85] "Mgst3"         "Sdc4"          "Apol10a"       "Dstn"         
##  [89] "Fabp1"         "Cd74"          "Cox5b"         "Cdhr5"        
##  [93] "Eef1a1"        "Rps19"         "Ndufa4"        "Mgst2"        
##  [97] "Mgst1"         "Mttp"          "H2-Aa"         "Anxa4"
GEX.seur <- ScaleData(GEX.seur, features = rownames(GEX.seur))
## Centering and scaling data matrix

PCA

# exclude MT genes,  and more possible contamination genes

#DIG <- grep("^Tra|^Trb|^Trg|^Trd|^Tcr|^Igm|^Igh|^Igk|^Igl|Jchain|^Hsp|^Rps|^Rpl|Hbb-|Hba-|^Dnaj|^AY|^Gm|^Hist",rownames(GEX.seur),value = T)

DIG <- grep("^Tra|^Trb|^Trg|^Trd|^Tcr|^Igm|^Igh|^Igk|^Igl|Jchain|Mzb1|Lars2|^Hsp|^Rps|^Rpl|Hbb-|Hba-|^Dnaj|^AY|^Gm|^Hist|^0|^1|^2|^3|^4|^5|^6|^7|^8|^9",
            rownames(GEX.seur),value = T)
CC_gene <- Hmisc::capitalize(tolower(as.vector(unlist(cc.genes.updated.2019))))


GEX.seur <- RunPCA(GEX.seur, features = setdiff(VariableFeatures(object = GEX.seur),
                                                c(MT_gene,
                                                  DIG, 
                                                  CC_gene) ))
## PC_ 1 
## Positive:  Crem, Il6, Ccl9, Cxcl10, Nr4a2, Vcam1, Emb, Pax5, Ccl5, Wfdc17 
##     Lyz2, Cd34, Saa3, Vps37b, Ms4a4c, Klrb1b, Rsad2, Klrk1, Ctla2b, Ctla4 
##     Cx3cr1, Il2ra, Cd28, Olfm4, Cd209a, C1qb, C1qa, Trf, Itgae, Il2rb 
## Negative:  Atp8b1, Dsg2, Pcsk5, Cldn7, Sult1d1, Gpx2, Prr15l, Epcam, Tm4sf5, Prap1 
##     Ckmt1, Prlr, Lgals4, Mgst3, Pigr, Smim22, Aldob, Lmo7, Fbp2, Trim2 
##     Aoc1, Lad1, H2-T3, Vil1, Ces2e, Sgpp2, Chchd10, Lypd8, Degs2, Aadac 
## PC_ 2 
## Positive:  Prap1, Aoc1, Spink1, Ces2e, Sult1d1, Sis, Mgam, Slc51a, Slc5a1, Apob 
##     Pls1, Vil1, Fbp2, Apoa1, H2-T3, Apol10a, Cdhr5, Cda, Myo1a, Fabp2 
##     Lgals4, Cyp4f14, Npc1l1, Krt20, Ces2c, Aldob, Btnl1, Otc, Maoa, Gpd1 
## Negative:  Serpinh1, Igfbp7, Col1a2, Fkbp9, Col6a2, Col6a1, Rarres2, Gpx3, Mmp14, Lamc1 
##     Col4a1, Sparc, Col3a1, Serping1, Pmp22, Dcn, C1s1, Col14a1, Cygb, Efemp1 
##     Col6a3, Fstl1, Dlc1, Ltbp4, Lpar1, Col5a1, Pmepa1, Col1a1, Plpp3, Fxyd1 
## PC_ 3 
## Positive:  Dpep1, Lrp1, Apoa1, Spink1, Pcsk5, Apob, Enpep, Slc5a1, Mep1b, Guca2b 
##     Col1a2, Igfbp7, Sis, Anpep, Dcn, Rarres2, Cdhr5, Npc1l1, Col6a1, Col6a2 
##     Col6a3, Col3a1, Btnl1, Bgn, Lct, Asah2, Reg3b, Cygb, Ces2e, Col5a1 
## Negative:  H2-Eb1, H2-Aa, H2-Ab1, Cd74, Ctss, Apoe, Cd83, Apol7c, C1qa, C1qc 
##     C1qb, Dnase1l3, Ifi30, Jaml, AW112010, Lyz2, Got1, Irf8, Cxcl16, Pld4 
##     Ctsc, Eef1a1, Mafb, Plbd1, Arl5c, Serpinb6b, Cd38, Sdc4, Serpinb9, Ctsb 
## PC_ 4 
## Positive:  H2-Ab1, H2-Eb1, H2-Aa, Mafb, Cd74, Apoe, Ctss, C1qa, C1qc, C1qb 
##     Selenop, Cd83, Apol7c, Cxcl16, AW112010, Dnase1l3, Lyz2, Jaml, Apoa1, Irf8 
##     Got1, Anpep, Maf, Plbd1, Ctsc, Pld4, Pxdc1, Exoc3l4, Slc5a1, Arl5c 
## Negative:  Tspan1, Agr2, Slc12a2, Fxyd3, Slc12a8, Kcnk1, Prom1, Sox9, Clmn, Rab27b 
##     Bace2, Spdef, Ern2, Tspan8, Gcnt3, Ttc39a, Klk1, Creb3l4, Arfgef3, Galnt12 
##     Foxa3, Bcas1, Rap1gap, Pdia5, Galnt3, Rasef, Mlph, Smim6, Krt18, Spats2l 
## PC_ 5 
## Positive:  Apoe, C1qa, C1qc, Mafb, C1qb, Ctss, H2-Aa, H2-Eb1, H2-Ab1, Agr2 
##     Cd74, Selenop, Cxcl16, Apol7c, Fermt1, Cd83, Dnase1l3, Slc12a2, Hid1, Ern2 
##     Gcnt3, Slc12a8, Fcgbp, Fxyd3, Rab27b, Jaml, Tff3, Smim6, Spink4, Klk1 
## Negative:  Gapdh, Eef1a1, Fau, Rack1, Ybx1, Txn1, Ppia, Eno1, Uba52, Cox6c 
##     Naca, Eef1g, Uqcrq, Atp5e, Tomm7, Gnas, Calm1, Cox7a2, Atp5j, Ndufb1-ps 
##     Atp5md, Gpx1, Atp5h, Aldh1b1, Ldha, Atp5mpl, Hmgb1, Eef1b2, Atp5g1, Tpi1
length(setdiff(VariableFeatures(object = GEX.seur),
                                                c(MT_gene,DIG, CC_gene) ))
## [1] 1366
head(setdiff(VariableFeatures(object = GEX.seur),
                                                c(MT_gene,DIG, CC_gene) ))
## [1] "Krt8"   "Lgals4" "Epcam"  "Pigr"   "Krt19"  "Lgals2"
DimPlot(GEX.seur, reduction = "pca",dims = 1:2) +
  DimPlot(GEX.seur, reduction = "pca",dims = 2:3)

DimHeatmap(GEX.seur, dims = 1:16, cells = 1500, balanced = TRUE,ncol = 4)

decide PCs to use
ElbowPlot(GEX.seur,ndims = 40)

PCs <- 1:15
GEX.seur <- FindNeighbors(GEX.seur, dims = PCs, k.param = 10)
## Computing nearest neighbor graph
## Computing SNN
GEX.seur <- FindClusters(GEX.seur, resolution = 0.6)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 3776
## Number of edges: 49556
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.7644
## Number of communities: 8
## Elapsed time: 0 seconds

Run UMAP/tSNE

GEX.seur <- RunTSNE(GEX.seur, dims=PCs)
GEX.seur <- RunUMAP(GEX.seur, dims=PCs, n.neighbors = 10)
## Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
## To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
## This message will be shown once per session
## 15:03:11 UMAP embedding parameters a = 0.9922 b = 1.112
## 15:03:11 Read 3776 rows and found 15 numeric columns
## 15:03:11 Using Annoy for neighbor search, n_neighbors = 10
## 15:03:11 Building Annoy index with metric = cosine, n_trees = 50
## 0%   10   20   30   40   50   60   70   80   90   100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 15:03:11 Writing NN index file to temp file C:\Users\Shaorui\AppData\Local\Temp\RtmpkvITdm\file5070266777a1
## 15:03:11 Searching Annoy index using 1 thread, search_k = 1000
## 15:03:12 Annoy recall = 100%
## 15:03:12 Commencing smooth kNN distance calibration using 1 thread
## 15:03:13 Initializing from normalized Laplacian + noise
## 15:03:13 Commencing optimization for 500 epochs, with 53866 positive edges
## 15:03:20 Optimization finished
DimPlot(GEX.seur, reduction = "tsne", label = T) + DimPlot(GEX.seur, reduction = "umap", label = T)

FeaturePlot(GEX.seur, reduction = "umap", features = c("nFeature_RNA","nCount_RNA"),ncol = 2)

check markers

DotPlot(GEX.seur, features = rev(c("Ptprc",
                                   "Cd3d","Cd3e",
                                   "Fcer1g",
                                   "Itgam","Ccr3","Fcgr3","Siglecf",
                                   "Adgre1","Itga4","Il5ra",
                                   "Nmur1",
                                   "Il1b","Ccl4","Ccl6","Csf1",
                                   "Pecam1",
                                   "Epcam","Tspan8","Krt19","Cldn7",
                                   "Cd3g","Ifng","Ramp3","Ctla4",
                                   "Dcn","Col1a1","Col1a2","Col3a1",
                                   "Rgl1",
                                   "Ly6e","Siglech",
                                   "Itgae","Pkib","Havcr2","Ly6c2",
                                   "H2-DMb1","H2-Eb1","H2-Ab1","H2-Aa",
                                   "Apoe","Lyz2","Ms4a6c","Mafb",
                                   "C1qa","C1qb","C1qc")))  + coord_flip() + 
  theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = 9.6))

find markers

# find markers for every cluster compared to all remaining cells, report only the positive ones
Idents(GEX.seur) <- "seurat_clusters"

#GEX.markers.pre <- FindAllMarkers(GEX.seur, only.pos = TRUE, min.pct = 0.1,
#                                  test.use = "MAST",
#                                  logfc.threshold = 0.25)
GEX.markers.pre <- read.table("10x_ZKW_GEX.markers.pre.0705.csv", header = TRUE, sep = ",")
GEX.markers.pre %>% group_by(cluster) %>% top_n(n = 8, wt = avg_log2FC)
## # A tibble: 64 x 7
## # Groups:   cluster [8]
##       p_val avg_log2FC pct.1 pct.2 p_val_adj cluster gene 
##       <dbl>      <dbl> <dbl> <dbl>     <dbl>   <int> <chr>
##  1 8.10e-95      0.789 0.88  0.522  1.20e-90       0 Gapdh
##  2 1.56e-92      0.635 0.987 0.783  2.31e-88       0 Rpl41
##  3 2.19e-89      0.634 0.992 0.85   3.24e-85       0 Rps29
##  4 3.94e-85      0.668 0.939 0.653  5.83e-81       0 Rps27
##  5 3.94e-77      0.590 0.802 0.462  5.82e-73       0 Rpl37
##  6 2.98e-70      0.764 0.699 0.363  4.40e-66       0 Rps2 
##  7 7.28e-60      0.637 0.641 0.34   1.08e-55       0 Rps19
##  8 4.28e-42      0.585 0.849 0.617  6.32e-38       0 Ccl6 
##  9 1.18e-45      0.565 1     0.993  1.74e-41       1 Fth1 
## 10 3.56e-31      0.493 0.974 0.908  5.26e-27       1 Tgm2 
## # ... with 54 more rows
GEX.markers.pre$cluster <- factor(as.character(GEX.markers.pre$cluster),
                          levels = levels(GEX.seur$seurat_clusters))

markers.pre_t8 <- (GEX.markers.pre %>% group_by(cluster) %>% 
                  #filter(pct.1>0.25 & !(gene %in% c(MT_gene,DIG, CC_gene))) %>%
                    filter(pct.1>0.1 & !(gene %in% grep("^Rps|^Rpl",rownames(GEX.seur),value = T))) %>%
                   top_n(n = 8, wt = avg_log2FC) %>%
                   ungroup() %>%
  arrange(desc(avg_log2FC*pct.1),gene) %>%
                             distinct(gene, .keep_all = TRUE) %>%
                             arrange(cluster,p_val_adj))$gene

markers.pre_t16 <- (GEX.markers.pre %>% group_by(cluster) %>% 
                  #filter(pct.1>0.25 & !(gene %in% c(MT_gene,DIG, CC_gene))) %>%
                    filter(pct.1>0.1 & !(gene %in% grep("^Rps|^Rpl",rownames(GEX.seur),value = T))) %>%
                   top_n(n = 16, wt = avg_log2FC) %>%
                   ungroup() %>%
  arrange(desc(avg_log2FC*pct.1),gene) %>%
                             distinct(gene, .keep_all = TRUE) %>%
                             arrange(cluster,p_val_adj))$gene

markers.pre_t32 <- (GEX.markers.pre %>% group_by(cluster) %>% 
                  #filter(pct.1>0.1 & !(gene %in% c(MT_gene,DIG, CC_gene))) %>%
                    filter(pct.1>0.1 & !(gene %in% grep("^Rps|^Rpl",rownames(GEX.seur),value = T))) %>%
                   top_n(n = 32, wt = avg_log2FC) %>%
                   ungroup() %>%
  arrange(desc(avg_log2FC*pct.1),gene) %>%
                             distinct(gene, .keep_all = TRUE) %>%
                             arrange(cluster,p_val_adj))$gene

markers.pre_t48 <- (GEX.markers.pre %>% group_by(cluster) %>% 
                  #filter(pct.1>0.1 & !(gene %in% c(MT_gene,DIG, CC_gene))) %>%
                    filter(pct.1>0.1 & !(gene %in% grep("^Rps|^Rpl",rownames(GEX.seur),value = T))) %>%
                   top_n(n = 48, wt = avg_log2FC) %>%
                   ungroup() %>%
  arrange(desc(avg_log2FC*pct.1),gene) %>%
                             distinct(gene, .keep_all = TRUE) %>%
                             arrange(cluster,p_val_adj))$gene
DotPlot(GEX.seur, features = rev(markers.pre_t48[1:64]))  + coord_flip() + 
  theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = 9.6))

DotPlot(GEX.seur, features = rev(markers.pre_t48[65:128]))  + coord_flip() + 
  theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = 9.6))

DotPlot(GEX.seur, features = rev(markers.pre_t48[129:192]))  + coord_flip() + 
  theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = 9.6))

DotPlot(GEX.seur, features = rev(markers.pre_t48[193:256]))  + coord_flip() + 
  theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = 9.6))

DotPlot(GEX.seur, features = rev(markers.pre_t48[257:320]))  + coord_flip() + 
  theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = 9.6))

DotPlot(GEX.seur, features = rev(markers.pre_t48[321:336]))  + coord_flip() + 
  theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = 9.6))

FeaturePlot(GEX.seur,features = markers.pre_t48[1:64])

FeaturePlot(GEX.seur,features = markers.pre_t48[65:128])

FeaturePlot(GEX.seur,features = markers.pre_t48[129:192])

FeaturePlot(GEX.seur,features = markers.pre_t48[193:256])

FeaturePlot(GEX.seur,features = markers.pre_t48[257:320])

FeaturePlot(GEX.seur,features = markers.pre_t48[321:336])

check markers

DotPlot(GEX.seur, features = rev(c("Ptprc",
                                   "Cd3d","Cd3e",
                                   "Fcer1g",
                                   "Itgam","Ccr3","Fcgr3","Siglecf",
                                   "Adgre1","Itga4","Il5ra",
                                   "Nmur1",
                                   "Il1b","Ccl4","Ccl6","Csf1",
                                   "Pecam1",
                                   "Epcam","Tspan8","Krt19","Cldn7",
                                   "Cd3g","Ifng","Ramp3","Ctla4",
                                   "Dcn","Col1a1","Col1a2","Col3a1",
                                   "Rgl1",
                                   "Ly6e","Siglech",
                                   "Itgae","Pkib","Havcr2","Ly6c2",
                                   "H2-DMb1","H2-Eb1","H2-Ab1","H2-Aa",
                                   "Apoe","Lyz2","Ms4a6c","Mafb",
                                   "C1qa","C1qb","C1qc")))  + coord_flip() + 
  theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = 9.6))

length(c("Ptprc",
                                   "Cd3d","Cd3e",
                                   "Fcer1g",
                                   "Itgam","Ccr3","Fcgr3","Siglecf",
                                   "Adgre1","Itga4","Il5ra",
                                   "Nmur1",
                                   "Il1b","Ccl4","Ccl6","Csf1",
                                   "Pecam1",
                                   "Epcam","Tspan8","Krt19","Cldn7",
                                   "Cd3g","Ifng","Ramp3","Ctla4",
                                   "Dcn","Col1a1","Col1a2","Col3a1",
                                   "Rgl1",
                                   "Ly6e","Siglech",
                                   "Itgae","Pkib","Havcr2","Ly6c2",
                                   "H2-DMb1","H2-Eb1","H2-Ab1","H2-Aa",
                                   "Apoe","Lyz2","Ms4a6c","Mafb",
                                   "C1qa","C1qb","C1qc"))
## [1] 47
FeaturePlot(GEX.seur, features = c("Ptprc",
                                   "Cd3d","Cd3e",
                                   "Fcer1g",
                                   "Itgam","Ccr3","Fcgr3","Siglecf",
                                   "Adgre1","Itga4","Il5ra",
                                   "Nmur1",
                                   "Il1b","Ccl4","Ccl6","Csf1",
                                   "Pecam1",
                                   "Epcam","Tspan8","Krt19","Cldn7",
                                   "Cd3g","Ifng","Ramp3","Ctla4",
                                   "Dcn","Col1a1","Col1a2","Col3a1",
                                   "Rgl1",
                                   "Ly6e","Siglech",
                                   "Itgae","Pkib","Havcr2","Ly6c2",
                                   "H2-DMb1","H2-Eb1","H2-Ab1","H2-Aa",
                                   "Apoe","Lyz2","Ms4a6c","Mafb",
                                   "C1qa","C1qb","C1qc"), ncol = 4)

FeaturePlot(GEX.seur, features = c("Cavin1","Pmp22","Loxl2","Sfrp1",
                                   "Bmp1","Dlc1","Dpt","Ogn",
                                   "Abca8a","Lamc1","Fxyd1","Bmp4",
                                   "Cxcr6","Gimap3","Sh2d2a","Zap70",
                                   "Ccl11","Fbln5","Lpl","Ddr2",
                                   "Cygb","Bicc1","Eln","Clec3b",
                                   "Tnxb","Ehd2","Meg3","Il2rb",
                                   "Tns1","Lamb1","Loxl1","Nid1"), ncol = 4)

FeaturePlot(GEX.seur, features = c("Mmp2","Lama4","Lama2","Dkk3",
                                   "Igfbp3","Cxcl12","Axl","Adamts1",
                                   "Wnt4a","Mrc2","C3","Hmcn2",
                                   "Ackr3","Gulp1","Gja1","Pdgfra",
                                   "Ctsw","Rcn3","Mgp","Cavin2",
                                   "Cavin3","Rbms3","Nnmt","Spon2",
                                   "Lhfp","Tcf21","Nkg7","Akap12",
                                   "Nt5e","Trbc1","Amotl1","Cryab"), ncol = 4)
## Warning in FetchData(object = object, vars = c(dims, "ident", features), : The
## following requested variables were not found: Wnt4a

table(GEX.seur$seurat_clusters)
## 
##   0   1   2   3   4   5   6   7 
## 919 832 658 613 545 108  66  35
sl_stat <- table(GEX.seur$seurat_clusters)
barplot(sl_stat,ylim = c(0,1200),col = c(scales::hue_pal()(8)),
        main = "cluster statistics",cex.names = 0.75)
text(x=1:8*1.2-0.45,y=sl_stat+51,paste0(sl_stat,"\n",100*round(as.numeric(sl_stat/sum(sl_stat)),4),"%"),cex = 0.75)

sl_stat <- table(GEX.seur$seurat_clusters)
barplot(sl_stat,ylim = c(0,1200),col = c(scales::hue_pal()(8))[c(rep(1,5),6:8)],
        main = "cluster statistics",cex.names = 0.75)
text(x=1:8*1.2-0.45,y=sl_stat+51,paste0(sl_stat,"\n",100*round(as.numeric(sl_stat/sum(sl_stat)),4),"%"),cex = 0.75)

sum(table(GEX.seur$seurat_clusters)[1:5])
## [1] 3567
sum(table(GEX.seur$seurat_clusters))
## [1] 3776
sum(table(GEX.seur$seurat_clusters)[1:5])/sum(table(GEX.seur$seurat_clusters))
## [1] 0.9446504
DotPlot(GEX.seur, features = rev(c("Klra17",
                                   "Nmur1",
                                   "Cdh17","Clec4a4","Dpep2","Ffar1",
                                   "F2rl3","Sirpb1a","Cyp1b1","Cd22",
                                   "Jaml","Dio2","Hcar2","Dpep3",
                                   "Ptger3","Gpr171"))
        )  + coord_flip() + 
  theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = 9.6))

VlnPlot(GEX.seur, features = c("Klra17",
                                   "Nmur1",
                                   "Cdh17","Clec4a4","Dpep2","Ffar1",
                                   "F2rl3","Sirpb1a","Cyp1b1","Cd22",
                                   "Jaml","Dio2","Hcar2","Dpep3",
                                   "Ptger3","Gpr171"))  + 
  theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = 9.6))

DoubletFinder

library(DoubletFinder)
## Loading required package: KernSmooth
## KernSmooth 2.23 loaded
## Copyright M. P. Wand 1997-2009
## Loading required package: ROCR

## NULL
## [1] "Creating 1259 artificial doublets..."
## [1] "Creating Seurat object..."
## [1] "Normalizing Seurat object..."
## [1] "Finding variable genes..."
## [1] "Scaling data..."
## Centering and scaling data matrix
## [1] "Running PCA..."
## [1] "Calculating PC distance matrix..."
## [1] "Computing pANN..."
## [1] "Classifying doublets.."
## [1] "Creating 1259 artificial doublets..."
## [1] "Creating Seurat object..."
## [1] "Normalizing Seurat object..."
## [1] "Finding variable genes..."
## [1] "Scaling data..."
## [1] "Running PCA..."
## [1] "Calculating PC distance matrix..."
## [1] "Computing pANN..."
## [1] "Classifying doublets.."

preAnno

GEX.seur$preAnno1 <- as.character(GEX.seur$seurat_clusters)

GEX.seur$preAnno1[GEX.seur$preAnno1 %in% c(0)] <- "EOS1"
GEX.seur$preAnno1[GEX.seur$preAnno1 %in% c(1)] <- "EOS2"
GEX.seur$preAnno1[GEX.seur$preAnno1 %in% c(2)] <- "EOS3"
GEX.seur$preAnno1[GEX.seur$preAnno1 %in% c(3)] <- "EOS4"
GEX.seur$preAnno1[GEX.seur$preAnno1 %in% c(4)] <- "EOS5"

GEX.seur$preAnno1[GEX.seur$preAnno1 %in% c(6)] <- "EpC/FIB"
GEX.seur$preAnno1[GEX.seur$preAnno1 %in% c(5)] <- "MAC"
GEX.seur$preAnno1[GEX.seur$preAnno1 %in% c(7)] <- "DC/T"

GEX.seur$preAnno1 <- factor(GEX.seur$preAnno1,
                            levels = c(paste0("EOS",1:5),
                                       "EpC/FIB",
                                       "MAC",
                                       "DC/T"))
GEX.seur$preAnno2 <- as.character(GEX.seur$seurat_clusters)

GEX.seur$preAnno2[GEX.seur$preAnno2 %in% c(0:4)] <- "EOS"

GEX.seur$preAnno2[GEX.seur$preAnno2 %in% c(6)] <- "EpC/FIB"
GEX.seur$preAnno2[GEX.seur$preAnno2 %in% c(5)] <- "MAC"
GEX.seur$preAnno2[GEX.seur$preAnno2 %in% c(7)] <- "DC/T"

GEX.seur$preAnno2 <- factor(GEX.seur$preAnno2,
                            levels = c("EOS",
                                       "EpC/FIB",
                                       "MAC",
                                       "DC/T"))
ggsci::pal_igv("default")(40)
##  [1] "#5050FFFF" "#CE3D32FF" "#749B58FF" "#F0E685FF" "#466983FF" "#BA6338FF"
##  [7] "#5DB1DDFF" "#802268FF" "#6BD76BFF" "#D595A7FF" "#924822FF" "#837B8DFF"
## [13] "#C75127FF" "#D58F5CFF" "#7A65A5FF" "#E4AF69FF" "#3B1B53FF" "#CDDEB7FF"
## [19] "#612A79FF" "#AE1F63FF" "#E7C76FFF" "#5A655EFF" "#CC9900FF" "#99CC00FF"
## [25] "#A9A9A9FF" "#CC9900FF" "#99CC00FF" "#33CC00FF" "#00CC33FF" "#00CC99FF"
## [31] "#0099CCFF" "#0A47FFFF" "#4775FFFF" "#FFC20AFF" "#FFD147FF" "#990033FF"
## [37] "#991A00FF" "#996600FF" "#809900FF" "#339900FF"
scales::show_col(ggsci::pal_igv("default")(49))

color.pre1 <-  ggsci::pal_igv("default")(49)[c(1,5,7,31,33,
                                               19,
                                               16,
                                               2)]
color.pre2 <- ggsci::pal_igv("default")(49)[c(7,
                                               19,
                                               16,
                                               2)]
(DimPlot(GEX.seur, reduction = "umap", group.by = "preAnno1", label = T, label.size = 3.5,repel = T,
          cols =color.pre1) + NoLegend()) +
  (DimPlot(GEX.seur, reduction = "umap", group.by = "preAnno2", label = T, label.size = 3.5,repel = T,
          cols =color.pre2) + NoLegend())

DotPlot(GEX.seur, features = rev(c("Klra17",
                                   "Nmur1",
                                   "Cdh17","Clec4a4","Dpep2","Ffar1",
                                   "F2rl3","Sirpb1a","Cyp1b1","Cd22",
                                   "Jaml","Dio2","Hcar2","Dpep3",
                                   "Ptger3","Gpr171")), group.by = "preAnno1")  + coord_flip() + 
  theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = 9.6))

VlnPlot(GEX.seur, features = c("Klra17",
                                   "Nmur1",
                                   "Cdh17","Clec4a4","Dpep2","Ffar1",
                                   "F2rl3","Sirpb1a","Cyp1b1","Cd22",
                                   "Jaml","Dio2","Hcar2","Dpep3",
                                   "Ptger3","Gpr171"), group.by = "preAnno1")  + 
  theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = 9.6))

VlnPlot(GEX.seur, features = c("Ptprc",
                                   "Cd3d","Cd3e",
                                   "Fcer1g",
                                   "Itgam","Ccr3","Fcgr3","Siglecf",
                                   "Adgre1","Itga4","Il5ra",
                                   "Nmur1",
                                   "Il1b","Ccl4","Ccl6","Csf1",
                                   "Pecam1",
                                   "Epcam","Tspan8","Krt19","Cldn7",
                                   "Cd3g","Ifng","Ramp3","Ctla4",
                                   "Dcn","Col1a1","Col1a2","Col3a1",
                                   "Rgl1",
                                   "Ly6e","Siglech",
                                   "Itgae","Pkib","Havcr2","Ly6c2",
                                   "H2-DMb1","H2-Eb1","H2-Ab1","H2-Aa",
                                   "Apoe","Lyz2","Ms4a6c","Mafb",
                                   "C1qa","C1qb","C1qc"), group.by = "preAnno1")  + 
  theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = 9.6))

re-check QC

#color.pre1 <- c(scales::hue_pal()(8))
DimPlot(GEX.seur, reduction = "pca",dims = 1:2, group.by = "preAnno1",cols =color.pre1) +
  DimPlot(GEX.seur, reduction = "pca",dims = 3:4, group.by = "preAnno1",cols =color.pre1)

DimPlot(GEX.seur, reduction = "pca",dims = 1:2, group.by = "preAnno2",cols =color.pre2) +
  DimPlot(GEX.seur, reduction = "pca",dims = 3:4, group.by = "preAnno2",cols =color.pre2)

VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3, pt.size = 0.1, group.by = "preAnno1",cols =color.pre1)

VlnPlot(subset(GEX.seur,subset=nCount_RNA<15000), features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3, pt.size = 0.1, group.by = "preAnno1",cols =color.pre1)

VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3, pt.size = 0.1, group.by = "preAnno2",cols =color.pre2)

plota <- FeatureScatter(GEX.seur, feature1 = "nCount_RNA", feature2 = "percent.mt", group.by = "preAnno2",cols =color.pre2) 
             #coord_cartesian(xlim =c(0, 40000), ylim = c(0, 40))
plotb <- FeatureScatter(GEX.seur, feature1 = "nCount_RNA", feature2 = "nFeature_RNA", group.by = "preAnno2",cols =color.pre2)
             #coord_cartesian(xlim =c(0, 40000), ylim = c(0, 4000))
plota + plotb

plota <- FeatureScatter(GEX.seur, feature1 = "nCount_RNA", feature2 = "percent.mt", group.by = "preAnno1",cols =color.pre1) 
             #coord_cartesian(xlim =c(0, 40000), ylim = c(0, 40))
plotb <- FeatureScatter(GEX.seur, feature1 = "nCount_RNA", feature2 = "nFeature_RNA", group.by = "preAnno1",cols =color.pre1)
             #coord_cartesian(xlim =c(0, 40000), ylim = c(0, 4000))
plota + plotb

plota <- FeatureScatter(subset(GEX.seur,subset=nCount_RNA<15000), feature1 = "nCount_RNA", feature2 = "percent.mt", group.by = "preAnno1",cols =color.pre1) 
             #coord_cartesian(xlim =c(0, 40000), ylim = c(0, 40))
plotb <- FeatureScatter(subset(GEX.seur,subset=nCount_RNA<15000), feature1 = "nCount_RNA", feature2 = "nFeature_RNA", group.by = "preAnno1",cols =color.pre1)
             #coord_cartesian(xlim =c(0, 40000), ylim = c(0, 4000))
plota + plotb

table(data.frame(preA1=GEX.seur$preAnno1,
                 dbt0.05=GEX.seur$DoubletFinder0.05))
##          dbt0.05
## preA1     Doublet Singlet
##   EOS1        127     792
##   EOS2         14     818
##   EOS3         28     630
##   EOS4          3     610
##   EOS5          2     543
##   EpC/FIB       2      64
##   MAC          13      95
##   DC/T          0      35
table(data.frame(preA1=GEX.seur$preAnno1,
                 dbt0.1=GEX.seur$DoubletFinder0.1))
##          dbt0.1
## preA1     Doublet Singlet
##   EOS1        228     691
##   EOS2         40     792
##   EOS3         47     611
##   EOS4          6     607
##   EOS5          3     542
##   EpC/FIB       9      57
##   MAC          25      83
##   DC/T         20      15
#saveRDS(GEX.seur,"scEOS.preAnno_0705.rds")

check markers

Nmur1 SS2

Nmur1 P or N, DEGs using ‘FC>1.5 padj<0.01’

proc_DEG <- function(deg, p.cut=0.05, FC.cut = 2, padj=TRUE, abs=TRUE, mat_cut=NULL, gene_cut=NULL){
    rownames(deg) <- deg$gene
    
    if(padj==TRUE){
        deg <- deg %>% filter(padj < p.cut)
    }else{
        deg <- deg %>% filter(pvalue < p.cut)
    }
    
    if(abs==TRUE){
        deg <- deg %>% filter(abs(FC) > FC.cut)
    }else if(FC.cut >0){
        deg <- deg %>% filter(FC > FC.cut)
    }else{
        deg <- deg %>% filter(FC < FC.cut)
    }
    
    if(!is.null(mat_cut)){
        deg <- deg[rownames(deg) %in% rownames(mat_cut),]
    }
    if(!is.null(gene_cut)){
        deg <- deg[rownames(deg) %in% gene_cut,]
    }
    return(deg)
}
DEG_0608.SI_PN <- read.table("/Shared_win/projects/202205_Nmur1EOS/final3.1/edgeR_DEGs.SI_Nmur1.SI_P_vs_SI_N.csv", 
                           header = T, sep = ",")
rownames(DEG_0608.SI_PN) <- DEG_0608.SI_PN$gene
head(DEG_0608.SI_PN)
##          gene log2FoldChange   logCPM       LR       pvalue         padj
## Myc       Myc      -4.160795 6.152873 199.9002 2.195883e-45 1.756926e-41
## Clec4e Clec4e      -2.259573 9.068660 186.0219 2.348095e-42 9.393555e-39
## Snx10   Snx10      -2.168756 8.342083 180.7410 3.339058e-41 8.905268e-38
## H2-Q7   H2-Q7      -2.670811 8.156795 170.4181 5.995840e-39 1.199318e-35
## H2-Q6   H2-Q6      -2.820755 9.206747 169.6653 8.755237e-39 1.401013e-35
## Cxcl10 Cxcl10      -3.701292 6.957173 161.9878 4.162508e-37 5.550705e-34
##                FC
## Myc    -17.886451
## Clec4e  -4.788498
## Snx10   -4.496356
## H2-Q7   -6.367871
## H2-Q6   -7.065320
## Cxcl10 -13.007681
DEG_0608.SI_Pup <- proc_DEG(DEG_0608.SI_PN, abs = FALSE, p.cut = 0.01, FC.cut = 1.5, padj = T)$gene
DEG_0608.SI_Pup
##   [1] "Dpep2"         "Rcan1"         "Jaml"          "Cd22"         
##   [5] "Gtf2ird1"      "Cd33"          "Dio2"          "Fndc5"        
##   [9] "Lyz2"          "Cd300ld"       "Egr2"          "Nr4a2"        
##  [13] "Gpd1l"         "Myof"          "Gpr171"        "Ptgs1"        
##  [17] "Fgd2"          "Trim25"        "Cxcr2"         "Tnfrsf1a"     
##  [21] "Esam"          "Fcgr4"         "Cdh17"         "Prdm1"        
##  [25] "Isca1"         "Rgs1"          "Hdac5"         "Idi1"         
##  [29] "Slco4a1"       "Jakmip1"       "Maea"          "Nmur1"        
##  [33] "Pramef8"       "Dennd2a"       "Plscr1"        "Dok1"         
##  [37] "Slc38a1"       "Hmgcs1"        "Ank3"          "Gbp4"         
##  [41] "Mat2b"         "Csnk1e"        "Crtc3"         "Cd53"         
##  [45] "St18"          "Laptm4a"       "Msmo1"         "Insig1"       
##  [49] "Klra17"        "Plekhm3"       "Tspan13"       "Abcg2"        
##  [53] "Eepd1"         "Ly86"          "Gm20605"       "Ptger3"       
##  [57] "Il27"          "Fcrls"         "Osbpl9"        "Agmo"         
##  [61] "Zzz3"          "Grina"         "Gbp8"          "Mbnl2"        
##  [65] "Plk3"          "Ovca2"         "Cass4"         "Foxn3"        
##  [69] "Lyz1"          "Stx17"         "Slc23a2"       "Olfm4"        
##  [73] "Dnase1l3"      "Arid5b"        "Septin8"       "Trf"          
##  [77] "Slc25a13"      "Dusp16"        "AI987944"      "Snx19"        
##  [81] "St8sia4"       "F2rl3"         "Wsb2"          "Dcaf12"       
##  [85] "Rogdi"         "Rnf125"        "Slc40a1"       "Appl2"        
##  [89] "Fdft1"         "Itprip"        "Sirpb1a"       "Vav2"         
##  [93] "Ggt5"          "Ccnd2"         "Zfp512"        "Ctsl"         
##  [97] "Lgals8"        "Fxyd7"         "Pltp"          "Ddx20"        
## [101] "Golim4"        "Nr1d2"         "Epb41"         "Ttc28"        
## [105] "Elmsan1"       "Slc35a5"       "Plcxd2"        "Ffar1"        
## [109] "Pcyt2"         "Mylk"          "Slc15a4"       "Mcm6"         
## [113] "Mob3b"         "Fgl2"          "Calcoco1"      "Rexo5"        
## [117] "Ints12"        "Nfe2"          "Egr3"          "Maml2"        
## [121] "Zfp319"        "Slc39a4"       "St6galnac5"    "Cd84"         
## [125] "Ldlr"          "Htra2"         "Arrdc3"        "Trmt2b"       
## [129] "Srprb"         "Nudt4"         "Aco1"          "Mta3"         
## [133] "Arnt"          "Denn2b"        "Elmod2"        "Mpeg1"        
## [137] "Zfp318"        "Sqle"          "Adgrv1"        "Ets1"         
## [141] "BC052040"      "Slc24a3"       "Pgm2"          "Hsd17b7"      
## [145] "Vamp1"         "Trim12a"       "Srgap2"        "Gsdme"        
## [149] "Tpp2"          "Kcnip3"        "Rassf5"        "Las1l"        
## [153] "Stap1"         "Pdlim4"        "Tmem140"       "Adgrg3"       
## [157] "Cd68"          "Chd7"          "Zfp715"        "Rims3"        
## [161] "Defa17"        "Cep83"         "Ssh1"          "Pik3ip1"      
## [165] "Defa24"        "Fmo5"          "Acad11"        "Rfwd3"        
## [169] "Selenop"       "Ndc80"         "Zfp867"        "Mettl4"       
## [173] "Rasgef1b"      "Fdps"          "Stt3b"         "Tmem43"       
## [177] "Nlrp1a"        "Pgam1"         "Fasn"          "Nlrp12"       
## [181] "Dlg2"          "Ramp1"         "Aste1"         "Nek6"         
## [185] "Mfap1b"        "Lsp1"          "Golph3l"       "B3gnt7"       
## [189] "Dram2"         "Tctn3"         "Trim8"         "Stat4"        
## [193] "A130010J15Rik" "Idua"          "Galm"          "Fosl2"        
## [197] "Cytip"         "Hmox2"         "Il1r2"         "Clip1"        
## [201] "Socs2"         "Unc45a"        "Tinf2"         "Acot2"        
## [205] "Marchf7"       "Acvr1b"        "Traf7"         "Lpin1"        
## [209] "Tnfaip2"       "Gpr183"        "Irs2"          "Dtx4"         
## [213] "Krr1"          "Chd6"          "Lonp2"         "Nbr1"         
## [217] "Il13ra1"       "Tnfrsf26"      "Zswim4"        "Card11"       
## [221] "Nsun4"         "Cyp51"         "Aldh3b1"       "Csf3r"        
## [225] "Ss18"          "Zfp119b"       "Fnbp4"         "H2aw"         
## [229] "Nup155"        "Ccl4"          "Mcm7"          "Ppbp"         
## [233] "Timeless"      "Rab2b"         "Pnpo"          "Smg8"         
## [237] "Vps39"         "Dhcr7"         "Elmo2"         "Ankrd66"      
## [241] "Ace"           "Itpkb"         "Ubc"           "Hnrnpll"      
## [245] "Ercc5"         "Rrm1"          "Bex3"          "Xrcc1"        
## [249] "Notch4"        "Zfp110"        "Gab3"          "Nr4a3"        
## [253] "Tet2"          "Rcbtb2"        "Fos"           "Tgm1"         
## [257] "Frmd4b"        "Hp1bp3"        "Fam53c"        "Zscan26"      
## [261] "Hjurp"         "Ogfrl1"        "Stat2"         "Enpp5"        
## [265] "Atp8a1"        "Armcx5"        "Tbx21"         "Bbs9"         
## [269] "Nap1l5"        "Nemp2"         "Cyp1b1"        "Gm4707"       
## [273] "Zfp68"         "Copg1"         "Rapgef2"       "Gsto1"        
## [277] "Entpd1"        "Arhgef12"      "Tiam2"         "Crot"         
## [281] "Efcab14"       "Gpr55"         "Fam120b"       "Cers5"        
## [285] "Serinc5"       "Zdhhc9"        "Slc31a1"       "Gtpbp8"       
## [289] "Klhl21"        "Cln3"          "Sash1"         "Cd86"         
## [293] "Mpv17"         "Ppp1r3b"       "Pmvk"
DEG_0608.SI_Nup <- proc_DEG(DEG_0608.SI_PN, abs = FALSE, p.cut = 0.01, FC.cut = -1.5, padj = T)$gene
DEG_0608.SI_Nup
##   [1] "Myc"           "Clec4e"        "Snx10"         "H2-Q7"        
##   [5] "H2-Q6"         "Cxcl10"        "Icam1"         "Sod2"         
##   [9] "Slc2a6"        "Rps19"         "Nfkbia"        "Med11"        
##  [13] "Cs"            "Traf1"         "AB124611"      "Dusp2"        
##  [17] "C5ar1"         "Rpl10a"        "Ccl9"          "Gpr65"        
##  [21] "Clec4a2"       "Relb"          "Zc3h12a"       "Nfkbiz"       
##  [25] "Chst13"        "Itpkc"         "Olr1"          "Rpl13"        
##  [29] "Prkab2"        "Slpi"          "Tnf"           "Nfkbie"       
##  [33] "Slc11a1"       "Ehd1"          "Marcksl1"      "Rassf4"       
##  [37] "Ralgds"        "Padi4"         "Irak3"         "N4bp1"        
##  [41] "Acod1"         "Sema4d"        "Klhdc10"       "Eno1"         
##  [45] "Ier3"          "Rpl12"         "Il16"          "Cst7"         
##  [49] "Prdx5"         "Rack1"         "Nfkb2"         "Rps7"         
##  [53] "Pot1b"         "Rps10"         "Rpl8"          "Rpl5"         
##  [57] "Rpl32"         "Cd69"          "Fas"           "Gimap6"       
##  [61] "Capg"          "Rpsa"          "Slc39a1"       "Nfkbib"       
##  [65] "Rps2"          "Rps8"          "Ero1l"         "Ltb"          
##  [69] "Mapk6"         "Rps18"         "Rps15"         "Rps20"        
##  [73] "Atg16l2"       "Rpl13a"        "Pdlim7"        "Ccdc88b"      
##  [77] "Itgb7"         "Bcl3"          "Cd52"          "Trpm2"        
##  [81] "Notch1"        "Casp4"         "Rps5"          "Rplp0"        
##  [85] "Alox5ap"       "Rab32"         "Ston2"         "Ninj1"        
##  [89] "Itgal"         "Clec12a"       "Mrps18b"       "Rps15a"       
##  [93] "Rps16"         "Rnf149"        "Vasp"          "Ak2"          
##  [97] "Rps17"         "Foxn2"         "Rplp1"         "Txn1"         
## [101] "Rps3"          "C3ar1"         "Sell"          "Rps12"        
## [105] "H2-K1"         "Acp2"          "Cish"          "Rpl4"         
## [109] "Arap3"         "Emc6"          "Rpl17"         "Sec24a"       
## [113] "Emp3"          "Rnf19b"        "Rpl7"          "Adgre5"       
## [117] "Pus1"          "Dxo"           "Psme2"         "Apmap"        
## [121] "Mrpl16"        "Esrra"         "Il23a"         "Ccng2"        
## [125] "Rps27a"        "Gadd45b"       "Ifrd1"         "Dyrk2"        
## [129] "Nfat5"         "Rpl28"         "Nob1"          "Rps3a1"       
## [133] "Opa3"          "Slc29a2"       "Tnfaip3"       "Rps11"        
## [137] "Gm21188"       "Zdhhc18"       "Rps26"         "Rpl27a"       
## [141] "Mreg"          "Ggh"           "Il6"           "Rpl11"        
## [145] "Eef1g"         "Usp16"         "Rpl24"         "Tank"         
## [149] "Card19"        "Rpl6"          "Gpx1"          "Gmfg"         
## [153] "Rps4x"         "Gtf2ird2"      "Rps28"         "Tifa"         
## [157] "Ier5"          "Cd37"          "Rap1b"         "Rpl23"        
## [161] "Rpl14"         "Ctsz"          "Eif5a"         "Rpl18"        
## [165] "Cyba"          "Rpl23a"        "Map2k3"        "Herpud1"      
## [169] "S100a10"       "Mapkapk3"      "Rps6"          "Rpl21"        
## [173] "Id2"           "Txk"           "Ddit4"         "Rps24"        
## [177] "Arf4"          "Park7"         "Rplp2"         "Malt1"        
## [181] "Pilrb1"        "Rpl35"         "Hpn"           "Stk19"        
## [185] "Rps14"         "Eef1b2"        "Pa2g4"         "Rpl35a"       
## [189] "H2-Q4"         "Rpl15"         "Prg2"          "Rfx5"         
## [193] "Zeb2"          "Mapkapk2"      "Kdm6b"         "Rpl26"        
## [197] "Nop58"         "Rpl36a"        "Grcc10"        "Gpx4"         
## [201] "Zfp429"        "Serpinb1a"     "Lgals3"        "Uba52"        
## [205] "Noc2l"         "Rps23"         "Txnrd1"        "St3gal4"      
## [209] "C5ar2"         "Cst3"          "Pgk1"          "Orai2"        
## [213] "Rpl9"          "Rpl19"         "Rpl7a"         "Ssu72"        
## [217] "Sptssa"        "Rela"          "Rps25"         "Mllt6"        
## [221] "Atg9a"         "Fosl1"         "Siglecg"       "B2m"          
## [225] "Naca"          "Nampt"         "Cdk5r1"        "Cfb"          
## [229] "1700017B05Rik" "Tdg"           "Fgfr1"         "Rpl34"        
## [233] "Pi16"          "Asrgl1"        "Rpl39"         "Plek"         
## [237] "Gnai2"         "Pglyrp1"       "Nfkb1"         "Ifnar1"       
## [241] "Mtmr6"         "Snrpf"         "Tgfbi"         "Zfp593"       
## [245] "P2ry2"         "Slc38a2"       "G3bp1"         "Rab20"        
## [249] "Rpl30"         "Rpl27"         "Smdt1"         "Cxcl1"        
## [253] "Tgfb1"         "Igsf6"         "Anxa3"         "Ifngr2"       
## [257] "Bax"           "Sac3d1"        "Gna13"         "Cdc37"        
## [261] "S100a6"        "Rpl36al"       "Tpi1"          "Itgb1"        
## [265] "Serpinb2"      "Selenow"       "Birc3"         "Rpl37a"       
## [269] "Mif4gd"        "Il17ra"        "Trmt112"       "Pwp1"         
## [273] "Rpl18a"        "Tnip1"         "Rnd1"          "Tnfsf14"      
## [277] "Fau"           "Irf5"          "Osgep"         "Gm13212"      
## [281] "Nme2"          "Camk1"         "Arl13b"        "Mdh2"         
## [285] "Vdac3"         "Alas1"         "Apex1"         "Tagln2"       
## [289] "Rps13"         "Ly6e"          "Hint1"         "Pheta2"       
## [293] "Mrpl54"        "Rhov"          "Slc15a3"       "Cerk"         
## [297] "Ltv1"          "Gimap1"        "Wdr4"          "Llph"         
## [301] "Gpr84"         "Comtd1"        "Rpl41"         "Skil"         
## [305] "Sgms2"         "Aqp9"          "Prmt3"         "Dock10"       
## [309] "Sar1a"         "Tmcc3"         "Gimap5"        "Cflar"        
## [313] "Naga"          "Inpp5j"        "Ptgs2"         "Lfng"         
## [317] "Atp5h"         "Ppp1r11"       "Trp53"         "Ndufa6"       
## [321] "Krt80"         "Mrpl17"        "Mcl1"          "Ndufa13"      
## [325] "Rpl38"         "Chka"          "Rarg"          "Spata13"      
## [329] "Sec61b"        "Padi2"         "Cdc42ep2"      "Nedd4"        
## [333] "Gpr35"         "Arhgef3"       "Commd1b"       "Nme1"         
## [337] "Unc119"        "Crip1"         "Neurl3"        "Ap1s2"        
## [341] "Atp5e"         "Ttc39c"        "Zfp667"        "Rin3"         
## [345] "Irf2bp2"       "Cmtm7"         "Eef1akmt4"     "Pomp"         
## [349] "Aff1"          "Fam49b"        "Rcc2"          "Tgif1"        
## [353] "Crk"           "Sh2b2"         "Rbl2"          "Rpl37"        
## [357] "Tspo"          "Atad3a"        "Batf"          "Rps21"        
## [361] "Cox8a"         "Gnl1"          "Zc3hc1"        "Eef1e1"       
## [365] "Prelid1"       "Tsr1"          "Myl6"          "Gpatch4"      
## [369] "Nfkbid"        "Ube2e3"        "Eef1d"         "Naip5"        
## [373] "Klf10"         "Cotl1"         "Stk40"         "Dars"         
## [377] "Bst2"          "Arhgef18"      "Rrbp1"         "Vav3"         
## [381] "Pgls"          "Cox6a1"        "Rpl31"         "Rpl36"        
## [385] "Nqo1"          "Dot1l"         "Hspbp1"        "Cmpk1"        
## [389] "Agpat5"        "F10"           "Prkd3"         "H2-D1"        
## [393] "Klf7"          "Csrp1"         "Ube2f"         "Rps29"        
## [397] "Arfgef1"       "Cep85"         "Sema7a"        "Pfn1"         
## [401] "Fgfr1op"       "Fbxo11"        "Acot9"         "Flna"         
## [405] "Timm10"        "Adap1"         "Lacc1"         "Nhp2"         
## [409] "Gbp2"          "P2rx4"         "Gramd4"        "Vcl"          
## [413] "Hsd17b10"      "Rcn1"          "Sdf2"          "Ing2"         
## [417] "Mlec"          "Mrto4"         "Slc30a6"       "Stard5"       
## [421] "Il4"           "Lgmn"          "Nsa2"          "Tbc1d4"       
## [425] "Tmem63b"       "C1qbp"         "Sytl1"         "Baz1a"        
## [429] "Serbp1"        "Lmna"          "Tmem259"       "Eif3b"        
## [433] "Rbm8a"         "Taf7"          "Cox5b"         "Dennd4b"      
## [437] "Pabpn1"        "Ciapin1"       "Farsb"         "Ccdc9"        
## [441] "Katna1"        "Nadk"          "Mgst1"         "Ahnak"        
## [445] "Rpl22"         "Gpank1"        "Ebi3"          "Ten1"         
## [449] "Trem3"         "Lst1"          "Il1a"          "Ccl6"
signature score function
score.SI_Nup <- calculate_signature_score(as.data.frame(GEX.seur@assays[['RNA']]@data),
                                          DEG_0608.SI_Nup)
## Summarizing data
score.SI_Pup <- calculate_signature_score(as.data.frame(GEX.seur@assays[['RNA']]@data),
                                          DEG_0608.SI_Pup)
## Summarizing data
GEX.seur <- AddMetaData(GEX.seur,
                        score.SI_Nup,
                        "score.SI_Nup")
GEX.seur <- AddMetaData(GEX.seur,
                        score.SI_Pup,
                        "score.SI_Pup")
vln.score.SI_Nup <- GEX.seur@meta.data %>%
    ggplot(aes(preAnno1, score.SI_Nup, color = preAnno1)) +
    geom_violin(draw_quantiles=c(0.25,0.75),trim = FALSE) +
    #ylim(c(-0.25,0.35)) +
    ggbeeswarm::geom_quasirandom(size = 0.01, width = 0.2, alpha = 0.3) +
    stat_summary(fun=mean, geom="point", shape=18, size=3, color="black") +
    ggpubr::stat_compare_means(aes(lable = ..p.signif..), 
                               method = "wilcox.test",
                               comparisons = list(c("EOS1","EOS2"),
                                                  c("EOS1","EOS3"),
                                                  c("EOS1","EOS4"),
                                                  c("EOS1","EOS5")),
                               #label.y = c(0.15,0.2,0.15,0.2,0.17,0.1)
                               ) +
     scale_color_manual(values = color.pre1) +
    labs(x="",title="Signature Score of SI Nmur1_PvsN, Nup") +
    theme(panel.grid.major = element_blank(), 
          panel.grid.minor = element_blank(), 
          panel.background = element_blank(), 
          panel.border = element_rect(fill = NA)) + 
  theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = 9.6))

vln.score.SI_Pup <- GEX.seur@meta.data %>%
    ggplot(aes(preAnno1, score.SI_Pup, color = preAnno1)) +
    geom_violin(draw_quantiles=c(0.25,0.75),trim = FALSE) +
    #ylim(c(-0.25,0.35)) +
    ggbeeswarm::geom_quasirandom(size = 0.01, width = 0.2, alpha = 0.3) +
    stat_summary(fun=mean, geom="point", shape=18, size=3, color="black") +
    ggpubr::stat_compare_means(aes(lable = ..p.signif..), 
                               method = "wilcox.test",
                               comparisons = list(c("EOS5","EOS4"),
                                                  c("EOS5","EOS3"),
                                                  c("EOS5","EOS2"),
                                                  c("EOS1","EOS5")),
                               #label.y = c(0.15,0.2,0.15,0.2,0.17,0.1)
                               ) +
    scale_color_manual(values = color.pre1) +
    labs(x="",title="Signature Score of SI Nmur1_PvsN, Pup") +
    theme(panel.grid.major = element_blank(), 
          panel.grid.minor = element_blank(), 
          panel.background = element_blank(), 
          panel.border = element_rect(fill = NA)) + 
  theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = 9.6))
#vln.score.SI_Pup
#+ NoLegend()
cowplot::plot_grid(
  vln.score.SI_Nup,
  vln.score.SI_Pup,
  ncol = 2
)

cleaning up

recheck

distributions of nFeature/nCount/percent.mt, only show nCount<15k
and 2nd/3rd are DoubletFinder0.05/0.1 removed

# 
cowplot::plot_grid(
VlnPlot(subset(GEX.seur,subset=nCount_RNA<15000), 
        features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3, pt.size = 0.1, group.by = "preAnno1",cols =color.pre1),

VlnPlot(subset(GEX.seur,subset=nCount_RNA<15000 & DoubletFinder0.05=="Singlet"), 
        features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3, pt.size = 0.1, group.by = "preAnno1",cols =color.pre1),

VlnPlot(subset(GEX.seur,subset=nCount_RNA<15000 & DoubletFinder0.1=="Singlet"), 
        features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3, pt.size = 0.1, group.by = "preAnno1",cols =color.pre1),
ncol = 1)

GEX.seur_pure <- subset(GEX.seur, subset= preAnno2 == "EOS" & DoubletFinder0.1 == "Singlet" & nCount_RNA < 5000 & percent.mt < 5)
GEX.seur_pure
## An object of class Seurat 
## 14783 features across 3205 samples within 1 assay 
## Active assay: RNA (14783 features, 1500 variable features)
##  3 dimensional reductions calculated: pca, tsne, umap
(DimPlot(GEX.seur_pure, reduction = "umap", group.by = "preAnno1", label = T, label.size = 3.5,repel = T,
          cols =color.pre1) + NoLegend()) +
  (DimPlot(GEX.seur_pure, reduction = "umap", group.by = "preAnno2", label = T, label.size = 3.5,repel = T,
          cols =color.pre2) + NoLegend())

sum(GEX.seur$preAnno2 == "EOS")
## [1] 3567
GEX.seur_pure
## An object of class Seurat 
## 14783 features across 3205 samples within 1 assay 
## Active assay: RNA (14783 features, 1500 variable features)
##  3 dimensional reductions calculated: pca, tsne, umap
#saveRDS(GEX.seur_pure,"scEOS.preAnno_0705.pure.rds")